<?php
ini_set("memory_limit", "1024M");
require dirname(__FILE__).'/../core/init.php';

/* Do NOT delete this comment */
/* 不要删除这段注释 */

$configs = array(
    'name' => '糗事百科用户',
    //'log_show' => true,
    'tasknum' => 1,
    //'save_running_state' => true,
    'domains' => array(
        'qiushibaike.com',
        'www.qiushibaike.com'
    ),
    'scan_urls' => array(
        "http://www.qiushibaike.com/users/\d+",
    ),
    'list_url_regexes' => array(
        "http://www.qiushibaike.com/users/\d+/followers",
    ),
    'content_url_regexes' => array(
        "http://www.qiushibaike.com/users/\d+",
    ),
    'max_try' => 5,
    'export' => array(
        'type' => 'db', 
        'table' => 'qiubai_users',
    ),
    'fields' => array(
        array(
            'name' => "user_id",
            'selector' => "//a[contains(@class,'user-header-avatar')]/@href",
            'required' => true,
        ),
        array(
            'name' => "user_name",
            'selector' => "//div[contains(@class,'user-header-cover')]//h2",
            'required' => true,
        ),
        array(
            'name' => "user_fans_num",
            'selector' => "@<li><span>粉丝数:</span>(.*)</li>@",
            'selector_type' => "regex",
        ),
        array(
            'name' => "user_follow_num",
            'selector' => "@<li><span>关注数:</span>(.*)</li>@",
            'selector_type' => "regex",
        ),
        array(
            'name' => "user_publish_num",
            'selector' => "@<li><span>糗事:</span>(.*)</li>@",
            'selector_type' => "regex",
        ),
        array(
            'name' => "user_comment_num",
            'selector' => "@<li><span>评论:</span>(.*)</li>@",
            'selector_type' => "regex",
        ),
        array(
            'name' => "user_praise_num",
            'selector' => "@<li><span>笑脸:</span>(.*)</li>@",
            'selector_type' => "regex",
        ),
        array(
            'name' => "user_best_publish_num",
            'selector' => "@<li><span>糗事精选:</span>(.*)</li>@",
            'selector_type' => "regex",
        ),
        array(
            'name' => "user_marriage",
            'selector' => "@<li><span>婚姻:</span>(.*)</li>@",
            'selector_type' => "regex",
        ),
        array(
            'name' => "user_constellation",
            'selector' => "@<li><span>星座:</span>(.*)</li>@",
            'selector_type' => "regex",
        ),
        array(
            'name' => "user_jobs",
            'selector' => "@<li><span>职业:</span>(.*)</li>@",
            'selector_type' => "regex",
        ),
        array(
            'name' => "user_hometown",
            'selector' => "@<li><span>故乡:</span>(.*)</li>@",
            'selector_type' => "regex",
        ),
        array(
            'name' => "user_qbage",
            'selector' => "@<li><span>糗龄:</span>(.*)</li>@",
            'selector_type' => "regex",
        ),
        array(
            'name' => "created_at",
            'selector' => "//div[contains(@class,'user-header-cover')]//h2",
        ),
    ),
);

$spider = new phpspider($configs);

$spider->on_extract_field = function($fieldname, $data, $page) {
    if ($fieldname == 'user_qbage') {
        $data=substr($data, 0, strpos($data,'天'));
    }
    if ($fieldname == 'user_id') {
        $la=strpos($data,'rs/');
        $data=substr($data, $la+3);
        $ra=strpos($data,'/');
        $data=substr($data, 0, $ra);
    }
    if ($fieldname == 'created_at') {
        $data=time();
    }
    return $data;
};

$spider->start();

